{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.manifold import TSNE\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_embedding(dataset,\n",
    "                       iterations,\n",
    "                       perplexity,\n",
    "                       pca_dim,\n",
    "                       learning_rate,\n",
    "                       verbose=1):\n",
    "    path = f'../demo_embeddings/{dataset}/iterations_{iterations}/perplexity_{perplexity}/pca_{pca_dim}/learning_rate_{learning_rate}'\n",
    "\n",
    "    def display(string):\n",
    "        if verbose:\n",
    "            print(string)\n",
    "\n",
    "    if os.path.exists(path):\n",
    "        if os.path.exists(path + '/data.csv'):\n",
    "            display(f'{dataset} already exists.')\n",
    "            return\n",
    "    else:\n",
    "        os.makedirs(path)\n",
    "\n",
    "    data = pd.read_csv(f\"../data/{dataset}_input.csv\")\n",
    "    labels = pd.read_csv(f\"../data/{dataset}_labels.csv\")\n",
    "    nb_col = data.shape[1]\n",
    "\n",
    "    pca = PCA(n_components=min(nb_col, pca_dim))\n",
    "    data_pca = pca.fit_transform(data.values)\n",
    "\n",
    "    tsne = TSNE(n_components=3,\n",
    "                n_iter=iterations,\n",
    "                learning_rate=learning_rate,\n",
    "                perplexity=perplexity,\n",
    "                random_state=1131)\n",
    "\n",
    "    embedding = tsne.fit_transform(data_pca)\n",
    "\n",
    "    embedding_df = pd.DataFrame(embedding, columns=['x', 'y', 'z'])\n",
    "\n",
    "    embedding_df.index = labels.values\n",
    "\n",
    "    embedding_df.to_csv(path + f'/data.csv')\n",
    "\n",
    "    display(f'{path} has been generated.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "generate_embedding('mnist_3000', 250, 3, 25, 100, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "dataset = 'mnist_3000'\n",
    "iterations = 250\n",
    "perplexity = 3\n",
    "pca_dim = 25\n",
    "learning_rate = 10\n",
    "\n",
    "path = f'../demo_embeddings/{dataset}/iterations_{iterations}/perplexity_{perplexity}/pca_{pca_dim}/learning_rate_{learning_rate}'\n",
    "\n",
    "pd.read_csv(path+f'/data.csv', index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%timeit df = pd.read_csv(\"mnist_3000_input.csv\")\n",
    "%timeit df2 = pd.read_csv('../demo_embeddings/mnist_3000/iterations_250/perplexity_3/pca_25/learning_rate_10/data.csv')\n",
    "%timeit combined_df = df.join(df2.loc[:,'x':'z']).set_index(['x','y','z'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.read_csv('../demo_embeddings/mnist_3000/iterations_250/perplexity_3/pca_25/learning_rate_10/data.csv')\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image = df.iloc[0]\n",
    "matrix = image.values.reshape(28,28)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "di = {'x': -3.510562, 'y': 1.200590}\n",
    "def compare(coord):\n",
    "    return df2[coord] == di[coord]\n",
    "\n",
    "print(di['y'])\n",
    "df2[compare('y')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hoverData = { \"points\": [ { \"x\": 0.86785585, \"y\": 2.3639283, \"z\": 1.0667368, \"curveNumber\": 4, \"pointNumber\": 171, \"text\": \"Digit 4\" } ] }\n",
    "hover_point_np = np.array([hoverData['points'][0][i] for i in ['x', 'y', 'z']]).astype(np.float64)\n",
    "hover_point_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = df2.loc[:,'x':'z'].eq(hover_point_np).all(axis=1)\n",
    "df2[mask].index[0]\n",
    "\n",
    "image = df.iloc[df2[mask].index[0]].values.reshape(28,28).astype(np.float64)\n",
    "Image.fromarray(np.uint8(255 * image))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import base64\n",
    "from PIL import Image\n",
    "from io import BytesIO\n",
    "\n",
    "def numpy_to_b64(array, scalar=True):\n",
    "    if scalar:\n",
    "        array = np.uint8(255 * array)\n",
    "    \n",
    "    im_pil = Image.fromarray(array)\n",
    "    buff = BytesIO()\n",
    "    im_pil.save(buff, format=\"png\")\n",
    "    im_b64 = base64.b64encode(buff.getvalue()).decode(\"utf-8\")\n",
    "    \n",
    "    return im_b64\n",
    "\n",
    "numpy_to_b64(image)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
